--- title: mahoudata keywords: fastai sidebar: home_sidebar summary: "API details." ---

class PreProcess[source]

PreProcess(ctx)

Preprocess class to include all data preparation functions

PreProcess.clean_duplicates[source]

PreProcess.clean_duplicates()

Clean duplicates method

PreProcess.cols_munging[source]

PreProcess.cols_munging(dataframe, fillna=True)

Columns preparation method

PreProcess.scale_cols[source]

PreProcess.scale_cols(dataframe)

Min Max scaler for numeric columns

class RecommenderStrategyFactory[source]

RecommenderStrategyFactory(ctx)

Strategy factory

RecommenderStrategyFactory.createStrategy[source]

RecommenderStrategyFactory.createStrategy(strategy)

class NumericStrategy[source]

NumericStrategy(ctx)

Numeric based recommender system

NumericStrategy.model_builder[source]

NumericStrategy.model_builder(dataframe)

NumericStrategy.exec_strategy[source]

NumericStrategy.exec_strategy(dataframe, distance='cosine')

Explore Data

df = pd.read_csv("./data/dataset-datathon.csv")
profile = ProfileReport(df, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_notebook_iframe()

Remove duplicates

According to profile there are 60% duplicates. Get rid of them

df_clean = df.drop_duplicates(
#subset = df.columns.difference(['vajilla'])
)
profile = ProfileReport(df_clean, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_notebook_iframe()

Run Recommender

context = {'numeric_cols' : ['lupulo_afrutado_citrico', 
                             'lupulo_floral_herbal','amargor', 'color', 
                             'maltoso', 'licoroso', 'afrutado', 'especias','acidez']
}

f = RecommenderStrategyFactory(context)

strategy = f.createStrategy('numeric')

datamodel = strategy.model_builder(df_clean)

recommender_df = strategy.exec_strategy(datamodel)

recommender_df
0 1 2 3 4 5 6 7 8 9 ... 476 477 478 479 480 481 482 483 484 485
0 0.000000 0.000000 0.042737 0.014204 0.019602 0.003507 0.046649 0.079535 0.019307 0.019307 ... 0.107993 0.039501 0.178008 0.153839 0.372661 0.048717 0.037445 0.063611 0.034118 0.033039
1 0.000000 0.000000 0.042737 0.014204 0.019602 0.003507 0.046649 0.079535 0.019307 0.019307 ... 0.107993 0.039501 0.178008 0.153839 0.372661 0.048717 0.037445 0.063611 0.034118 0.033039
2 0.042737 0.042737 0.000000 0.027731 0.111271 0.045083 0.139327 0.149810 0.109016 0.109016 ... 0.186975 0.019876 0.262107 0.083787 0.430263 0.024581 0.057169 0.110674 0.015178 0.009456
3 0.014204 0.014204 0.027731 0.000000 0.042773 0.014581 0.072928 0.073416 0.040805 0.040805 ... 0.122566 0.015192 0.194159 0.123382 0.377082 0.024367 0.027006 0.058254 0.011111 0.016657
4 0.019602 0.019602 0.111271 0.042773 0.000000 0.016331 0.029392 0.063220 0.008608 0.008608 ... 0.081491 0.092742 0.127279 0.209331 0.319723 0.095347 0.063383 0.051801 0.088428 0.089179
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
481 0.048717 0.048717 0.024581 0.024367 0.095347 0.044618 0.097419 0.099593 0.087663 0.087663 ... 0.106695 0.022456 0.219157 0.050305 0.295921 0.000000 0.027633 0.048033 0.017491 0.012491
482 0.037445 0.037445 0.057169 0.027006 0.063383 0.038345 0.038369 0.109984 0.049632 0.049632 ... 0.095466 0.046965 0.176550 0.113228 0.301670 0.027633 0.000000 0.049326 0.030405 0.046380
483 0.063611 0.063611 0.110674 0.058254 0.051801 0.046130 0.050065 0.060642 0.059130 0.059130 ... 0.027529 0.092380 0.117532 0.093103 0.154117 0.048033 0.049326 0.000000 0.089897 0.081997
484 0.034118 0.034118 0.015178 0.011111 0.088428 0.041219 0.107349 0.101764 0.073097 0.073097 ... 0.153602 0.004043 0.269176 0.114322 0.419526 0.017491 0.030405 0.089897 0.000000 0.008074
485 0.033039 0.033039 0.009456 0.016657 0.089179 0.036679 0.116344 0.099990 0.080648 0.080648 ... 0.145527 0.009101 0.260754 0.087771 0.393298 0.012491 0.046380 0.081997 0.008074 0.000000

486 rows × 486 columns

recommendations_example = pd.DataFrame(recommender_df[1].sort_values(ascending=True))
recommendations_example
1
0 0.000000
1 0.000000
452 0.000000
5 0.003507
305 0.003507
... ...
473 0.647605
142 0.660262
193 NaN
195 NaN
330 NaN

486 rows × 1 columns